# Title     : TODO
# Objective : TODO
# Created by: Administrator
# Created on: 2019/7/24

library(ggrepel)
library(ropls)
library(pROC)
library(egg)
library(randomForest)
library(Boruta)
library(magrittr)
library(optparse)
library(gbm)
library(caret)
library(tidyverse)

createWhenNoExist <- function(f) {
  !dir.exists(f) && dir.create(f)
}

option_list <- list(
  make_option("--i", default = "AllMet.csv", type = "character", help = "metabolite data file"),
  make_option("--g", default = "SampleInfo.csv", type = "character", help = "sample group file"),
  make_option("--sc", default = "sample_color.txt", type = "character", help = "sample color file")
)
opt <- parse_args(OptionParser(option_list = option_list))

options(digits = 3)

sampleInfo <- read.csv(opt$g, header = T, stringsAsFactors = F) %>%
  select(c("SampleID", "ClassNote"))

head(sampleInfo)

parent <- paste0("./")
createWhenNoExist(parent)

fileName <- "../../03/true_boruta_after.txt"

if (!file.exists(fileName)) {
  quit(status = 0)
}

diffNames <- read_tsv(fileName) %>%
  .$Metabolite

data <- read.csv(opt$i, header = T) %>%
  select(-c("HMDB", "KEGG", "Class")) %>%
  filter(Metabolite %in% diffNames) %>%
  gather("SampleID", "Value", -Metabolite) %>%
  spread(Metabolite, "Value") %>%
  inner_join(sampleInfo, by = c("SampleID")) %>%
  mutate(ClassNote = factor(ClassNote, levels = unique(ClassNote))) %>%
  as.data.frame() %>%
  column_to_rownames("SampleID")

x <- data %>% select(-c("ClassNote"))
y <- data$ClassNote

minobsinnode <- ceiling(((nrow(data) * 0.5 - 1) / 2) - 1)

gbRs <- gbm(ClassNote ~ ., data = data, distribution = "multinomial", n.trees = 400, interaction.depth = 1,
            shrinkage = 0.03, n.minobsinnode = minobsinnode, bag.fraction = 0.5)
predBST <- predict(gbRs, newdata = x, n.trees = 400, type = "response")
p.predBST <- apply(predBST, 1, which.max)
predictRs <- colnames(predBST)[p.predBST]

rownames(predBST) <- rownames(data)
predDf <- predBST %>%
  as.data.frame(check.names = F, stringsAsFactors = F) %>%
  rownames_to_column("SampleID") %>%
  set_colnames(c("SampleID", colnames(predBST))) %>%
  rowwise() %>%
  do({
       result <- as.data.frame(.)
       values <- result[1,] %>%
         select(-c("SampleID")) %>%
         unlist()
       result$Probability <- max(values)
       result
     }) %>%
  select(c("SampleID", "Probability")) %>%
  add_column(Prediction = predictRs)
predictFinalDf <- sampleInfo %>%
  left_join(predDf, by = c("SampleID")) %>%
  rename(Sample = SampleID) %>%
  select(-c("Probability"), "Probability")
predictFinalDf
write.csv(predictFinalDf, "GB_Prediction.csv", row.names = F)

predDf1 <- predBST %>%
  as.data.frame(check.names = F, stringsAsFactors = F) %>%
  rownames_to_column("SampleID") %>%
  set_colnames(c("SampleID", colnames(predBST))) %>%
  rowwise() %>%
  do({
       result <- as.data.frame(.)
       values <- result[1,] %>%
         select(-c("SampleID")) %>%
         unlist()
       result$Value <- values[1]
       result
     }) %>%
  select(c("SampleID", "Value")) %>%
  add_column(Prediction = predictRs)
predictFinalDf1 <- sampleInfo %>%
  left_join(predDf1, by = c("SampleID"))
write_tsv(predictFinalDf1, "Classification_Result.txt")

varImp <- varImp(gbRs, scale = T, numTrees = 400)
varImpDf <- varImp %>%
  rownames_to_column("Metabolite") %>%
  rename(VarImp = Overall) %>%
  arrange(desc(VarImp)) %>%
  mutate(Metabolite = str_replace_all(Metabolite, "`", ""))
write.csv(varImpDf, "GB_VarImp.csv", row.names = F)

pre_summary = table(predictFinalDf$ClassNote, predictFinalDf$Prediction)

print(pre_summary)
summaryTb <- pre_summary %>%
  as.data.frame() %>%
  as_tibble() %>%
  spread(Var2, "Freq")
print("=log=")
print(summaryTb)
summaryMatrix <- summaryTb %>%
  select(-"Var1") %>%
  as.matrix()
diagSum <- sum(diag(summaryMatrix))
sum <- sum(summaryMatrix)
predictive <- (diagSum / sum) %>%
  round(3)
finalSummaryTb <- summaryTb %>%
  mutate(`Model predictive accuracy` = c(predictive, "")) %>%
  rename(` ` = Var1)
print(finalSummaryTb)

write_csv(finalSummaryTb, "GB_Prediction_Summary.csv")

# cTab = table(predictFinalDf$ClassNote, predictFinalDf$Prediction)
# odd_ratio = (cTab[1, 1] / cTab[1, 2]) / (cTab[2, 1] / cTab[2, 2])
# write.csv(odd_ratio, "Odd_Ratio.csv", row.names = F)

# train.control <- trainControl(method = "cv", number = 2, allowParallel = F)
# # gbRs <- gbm(ClassNote ~ ., data = data, distribution = "multinomial", n.trees = 400, interaction.depth = 1,
# # shrinkage = 0.03, n.minobsinnode = minobsinnode)
# tryCatch(model <<- train(ClassNote ~ ., data = data, distribution = "multinomial", n.trees = 400, interaction.depth = 1,
# shrinkage = 0.03, n.minobsinnode = minobsinnode, trControl = train.control, method = "gbm"),
# error = function(e) {
#     model <<- data.frame()
# })
# model
# write.csv(model$results, "model_cv.csv", row.names = F)

# model <- train(Fertility ~., data = swiss, method = "gbm", shrinkage = 0.1, n.minobsinnode = 10,
#  trControl = train.control)
# print(model)











